vor 11 Jahren · 9bf3c2c824
--- a/app/models/agents/website_agent.rb
+++ b/app/models/agents/website_agent.rb
@@ -4,7 +4,6 @@ require 'date'
 
                 
              
 
                 module Agents
              
 
                   class WebsiteAgent < Agent
              
 
                -    cannot_receive_events!
              
 
                 
              
 
                     default_schedule "every_12h"
              
 
                 
              
@@ -46,6 +45,8 @@ module Agents
 
                       Set `uniqueness_look_back` to limit the number of events checked for uniqueness (typically for performance).  This defaults to the larger of #{UNIQUENESS_LOOK_BACK} or #{UNIQUENESS_FACTOR}x the number of detected received results.
              
 
                 
              
 
                       Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset.
              
 
                +
              
 
                +      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload.
              
 
                     MD
              
 
                 
              
 
                     event_description do
              
@@ -105,19 +106,23 @@ module Agents
 
                     end
              
 
                 
              
 
                     def check
              
 
                -      hydra = Typhoeus::Hydra.new
              
 
                       log "Fetching #{options['url']}"
              
 
                +      check_url options['url']
              
 
                +    end
              
 
                +
              
 
                +    def check_url(in_url)
              
 
                +      hydra = Typhoeus::Hydra.new
              
 
                       request_opts = { :followlocation => true }
              
 
                       request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present?
              
 
                 
              
 
                       requests = []
              
 
                 
              
 
                -      if options['url'].kind_of?(Array)
              
 
                -        options['url'].each do |url|
              
 
                +      if in_url.kind_of?(Array)
              
 
                +        in_url.each do |url|
              
 
                            requests.push(Typhoeus::Request.new(url, request_opts))
              
 
                         end
              
 
                       else
              
 
                -        requests.push(Typhoeus::Request.new(options['url'], request_opts))
              
 
                +        requests.push(Typhoeus::Request.new(in_url, request_opts))
              
 
                       end
              
 
                 
              
 
                       requests.each do |request|
              
@@ -185,7 +190,7 @@ module Agents
 
                               options['extract'].keys.each do |name|
              
 
                                 result[name] = output[name][index]
              
 
                                 if name.to_s == 'url'
              
 
                -                  result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
              
 
                +                  result[name] = URI.join(request.base_url, result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
              
 
                                 end
              
 
                               end
              
 
                 
              
@@ -202,6 +207,13 @@ module Agents
 
                       end
              
 
                     end
              
 
                 
              
 
                +    def receive(incoming_events)
              
 
                +      incoming_events.each do |event|
              
 
                +        url_to_scrape = Utils.value_at(event['payload'], 'url')
              
 
                +        check_url(url_to_scrape)
              
 
                +      end
              
 
                +    end
              
 
                +
              
 
                     private
              
 
                 
              
 
                     # This method returns true if the result should be stored as a new event.
              
@@ -275,5 +287,7 @@ module Agents
 
                         false
              
 
                       end
              
 
                     end
              
 
                +
              
 
                   end
              
 
                +
              
 
                 end
              
--- a/spec/models/agents/website_agent_spec.rb
+++ b/spec/models/agents/website_agent_spec.rb
@@ -331,6 +331,19 @@ describe Agents::WebsiteAgent do
 
                         end
              
 
                       end
              
 
                     end
              
 
                +
              
 
                +    describe "#receive" do
              
 
                +      it "should scrape from the url element in incoming event payload" do
              
 
                +        @event = Event.new
              
 
                +        @event.agent = agents(:bob_rain_notifier_agent)
              
 
                +        @event.payload = { 'url' => "http://xkcd.com" }
              
 
                +
              
 
                +        lambda {
              
 
                +          @checker.options = @site
              
 
                +          @checker.receive([@event])
              
 
                +        }.should change { Event.count }.by(1)
              
 
                +      end
              
 
                +    end
              
 
                   end
              
 
                 
              
 
                   describe "checking with http basic auth" do
              
@@ -361,4 +374,4 @@ describe Agents::WebsiteAgent do
 
                       end
              
 
                     end
              
 
                   end
              
 
                -end
              
 
                +end